; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \
; RUN:   -ppc-vsr-nums-as-vr -mtriple=powerpc64le-unknown-linux-gnu < %s | \
; RUN: FileCheck %s --check-prefix=CHECK-LE-P8
; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \
; RUN:   -ppc-vsr-nums-as-vr -mtriple=powerpc64le-unknown-linux-gnu < %s | \
; RUN: FileCheck %s --check-prefix=CHECK-LE-P9
; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \
; RUN:   -ppc-vsr-nums-as-vr -mtriple=powerpc64-unknown-linux-gnu < %s | \
; RUN: FileCheck %s --check-prefix=CHECK-BE-P8
; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \
; RUN:   -ppc-vsr-nums-as-vr -mtriple=powerpc64-unknown-linux-gnu < %s | \
; RUN: FileCheck %s --check-prefix=CHECK-BE-P9

; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \
; RUN:   -ppc-vsr-nums-as-vr -mtriple=powerpc64-ibm-aix < %s | \
; RUN: FileCheck %s --check-prefix=CHECK-AIX-64-P8
; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \
; RUN:   -ppc-vsr-nums-as-vr -mtriple=powerpc64-ibm-aix < %s | \
; RUN: FileCheck %s --check-prefix=CHECK-AIX-64-P9
; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \
; RUN:   -ppc-vsr-nums-as-vr -mtriple=powerpc-ibm-aix < %s | \
; RUN: FileCheck %s --check-prefix=CHECK-AIX-32-P8
; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \
; RUN:   -ppc-vsr-nums-as-vr -mtriple=powerpc-ibm-aix < %s | \
; RUN: FileCheck %s --check-prefix=CHECK-AIX-32-P9

define void @test_none_v8i16(ptr %a) {
; CHECK-LE-P8-LABEL: test_none_v8i16:
; CHECK-LE-P8:       # %bb.0: # %entry
; CHECK-LE-P8-NEXT:    addis r4, r2, .LCPI0_0@toc@ha
; CHECK-LE-P8-NEXT:    lxsdx v2, 0, r3
; CHECK-LE-P8-NEXT:    addi r4, r4, .LCPI0_0@toc@l
; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r4
; CHECK-LE-P8-NEXT:    lhz r4, 0(r3)
; CHECK-LE-P8-NEXT:    mtvsrd v4, r4
; CHECK-LE-P8-NEXT:    xxswapd v3, vs0
; CHECK-LE-P8-NEXT:    vperm v2, v4, v2, v3
; CHECK-LE-P8-NEXT:    xxswapd vs0, v2
; CHECK-LE-P8-NEXT:    stfdx f0, 0, r3
; CHECK-LE-P8-NEXT:    blr
;
; CHECK-LE-P9-LABEL: test_none_v8i16:
; CHECK-LE-P9:       # %bb.0: # %entry
; CHECK-LE-P9-NEXT:    lxsihzx f0, 0, r3
; CHECK-LE-P9-NEXT:    lfd f1, 0(r3)
; CHECK-LE-P9-NEXT:    addis r3, r2, .LCPI0_0@toc@ha
; CHECK-LE-P9-NEXT:    addi r3, r3, .LCPI0_0@toc@l
; CHECK-LE-P9-NEXT:    lxv vs2, 0(r3)
; CHECK-LE-P9-NEXT:    xxperm vs1, vs0, vs2
; CHECK-LE-P9-NEXT:    xxswapd vs0, vs1
; CHECK-LE-P9-NEXT:    stfd f0, 0(r3)
; CHECK-LE-P9-NEXT:    blr
;
; CHECK-BE-P8-LABEL: test_none_v8i16:
; CHECK-BE-P8:       # %bb.0: # %entry
; CHECK-BE-P8-NEXT:    lhz r4, 0(r3)
; CHECK-BE-P8-NEXT:    lfdx f0, 0, r3
; CHECK-BE-P8-NEXT:    sldi r3, r4, 48
; CHECK-BE-P8-NEXT:    mtfprd f1, r3
; CHECK-BE-P8-NEXT:    xxmrghw vs0, vs0, vs1
; CHECK-BE-P8-NEXT:    stfdx f0, 0, r3
; CHECK-BE-P8-NEXT:    blr
;
; CHECK-BE-P9-LABEL: test_none_v8i16:
; CHECK-BE-P9:       # %bb.0: # %entry
; CHECK-BE-P9-NEXT:    lxsihzx v2, 0, r3
; CHECK-BE-P9-NEXT:    lfd f0, 0(r3)
; CHECK-BE-P9-NEXT:    vsplth v2, v2, 3
; CHECK-BE-P9-NEXT:    xxmrghw vs0, vs0, v2
; CHECK-BE-P9-NEXT:    stfd f0, 0(r3)
; CHECK-BE-P9-NEXT:    blr
;
; CHECK-AIX-64-P8-LABEL: test_none_v8i16:
; CHECK-AIX-64-P8:       # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT:    lhz r4, 0(r3)
; CHECK-AIX-64-P8-NEXT:    lfdx f0, 0, r3
; CHECK-AIX-64-P8-NEXT:    sldi r3, r4, 48
; CHECK-AIX-64-P8-NEXT:    mtfprd f1, r3
; CHECK-AIX-64-P8-NEXT:    xxmrghw vs0, vs0, vs1
; CHECK-AIX-64-P8-NEXT:    stfdx f0, 0, r3
; CHECK-AIX-64-P8-NEXT:    blr
;
; CHECK-AIX-64-P9-LABEL: test_none_v8i16:
; CHECK-AIX-64-P9:       # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT:    lxsihzx v2, 0, r3
; CHECK-AIX-64-P9-NEXT:    lfd f0, 0(r3)
; CHECK-AIX-64-P9-NEXT:    vsplth v2, v2, 3
; CHECK-AIX-64-P9-NEXT:    xxmrghw vs0, vs0, v2
; CHECK-AIX-64-P9-NEXT:    stfd f0, 0(r3)
; CHECK-AIX-64-P9-NEXT:    blr
;
; CHECK-AIX-32-P8-LABEL: test_none_v8i16:
; CHECK-AIX-32-P8:       # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT:    lhz r4, 0(r3)
; CHECK-AIX-32-P8-NEXT:    sth r4, -32(r1)
; CHECK-AIX-32-P8-NEXT:    addi r4, r1, -32
; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r4
; CHECK-AIX-32-P8-NEXT:    lwz r3, 0(r3)
; CHECK-AIX-32-P8-NEXT:    addi r4, r1, -16
; CHECK-AIX-32-P8-NEXT:    stxvw4x vs0, 0, r4
; CHECK-AIX-32-P8-NEXT:    stw r3, 0(r3)
; CHECK-AIX-32-P8-NEXT:    lwz r3, -16(r1)
; CHECK-AIX-32-P8-NEXT:    stw r3, 0(r3)
; CHECK-AIX-32-P8-NEXT:    blr
;
; CHECK-AIX-32-P9-LABEL: test_none_v8i16:
; CHECK-AIX-32-P9:       # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT:    lhz r4, 0(r3)
; CHECK-AIX-32-P9-NEXT:    sth r4, -32(r1)
; CHECK-AIX-32-P9-NEXT:    lxv vs0, -32(r1)
; CHECK-AIX-32-P9-NEXT:    lwz r3, 0(r3)
; CHECK-AIX-32-P9-NEXT:    stw r3, 0(r3)
; CHECK-AIX-32-P9-NEXT:    stxv vs0, -16(r1)
; CHECK-AIX-32-P9-NEXT:    lwz r3, -16(r1)
; CHECK-AIX-32-P9-NEXT:    stw r3, 0(r3)
; CHECK-AIX-32-P9-NEXT:    blr
entry:
  %0 = load <2 x i8>, ptr undef, align 1
  %tmp0_1 = bitcast <2 x i8> %0 to i16
  %tmp0_2 = insertelement <8 x i16> undef, i16 %tmp0_1, i32 0
  %tmp0_3 = bitcast <8 x i16> %tmp0_2 to <4 x i32>
  %1 = load <2 x i32>, ptr %a
  %tmp1_1 = shufflevector <2 x i32> %1, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
  %2 = shufflevector <4 x i32> %tmp0_3, <4 x i32> %tmp1_1, <2 x i32> <i32 4, i32 0>
  store <2 x i32> %2, ptr undef, align 4
  ret void
}

define void @test_v8i16_none(ptr %a) {
; CHECK-LE-P8-LABEL: test_v8i16_none:
; CHECK-LE-P8:       # %bb.0: # %entry
; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r3
; CHECK-LE-P8-NEXT:    lhz r3, 0(r3)
; CHECK-LE-P8-NEXT:    mtfprd f1, r3
; CHECK-LE-P8-NEXT:    xxswapd vs0, vs0
; CHECK-LE-P8-NEXT:    xxmrglw vs0, vs0, vs1
; CHECK-LE-P8-NEXT:    xxswapd vs0, vs0
; CHECK-LE-P8-NEXT:    stxvd2x vs0, 0, r3
; CHECK-LE-P8-NEXT:    blr
;
; CHECK-LE-P9-LABEL: test_v8i16_none:
; CHECK-LE-P9:       # %bb.0: # %entry
; CHECK-LE-P9-NEXT:    lxsihzx f0, 0, r3
; CHECK-LE-P9-NEXT:    lxv vs1, 0(r3)
; CHECK-LE-P9-NEXT:    xxmrglw vs0, vs1, vs0
; CHECK-LE-P9-NEXT:    stxv vs0, 0(r3)
; CHECK-LE-P9-NEXT:    blr
;
; CHECK-BE-P8-LABEL: test_v8i16_none:
; CHECK-BE-P8:       # %bb.0: # %entry
; CHECK-BE-P8-NEXT:    lhz r4, 0(r3)
; CHECK-BE-P8-NEXT:    lxvw4x vs0, 0, r3
; CHECK-BE-P8-NEXT:    mtfprwz f1, r4
; CHECK-BE-P8-NEXT:    xxmrghw vs0, vs1, vs0
; CHECK-BE-P8-NEXT:    stxvw4x vs0, 0, r3
; CHECK-BE-P8-NEXT:    blr
;
; CHECK-BE-P9-LABEL: test_v8i16_none:
; CHECK-BE-P9:       # %bb.0: # %entry
; CHECK-BE-P9-NEXT:    lxsihzx f0, 0, r3
; CHECK-BE-P9-NEXT:    lxv vs1, 0(r3)
; CHECK-BE-P9-NEXT:    xxmrghw vs0, vs0, vs1
; CHECK-BE-P9-NEXT:    stxv vs0, 0(r3)
; CHECK-BE-P9-NEXT:    blr
;
; CHECK-AIX-64-P8-LABEL: test_v8i16_none:
; CHECK-AIX-64-P8:       # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT:    lhz r4, 0(r3)
; CHECK-AIX-64-P8-NEXT:    lxvw4x vs0, 0, r3
; CHECK-AIX-64-P8-NEXT:    mtfprwz f1, r4
; CHECK-AIX-64-P8-NEXT:    xxmrghw vs0, vs1, vs0
; CHECK-AIX-64-P8-NEXT:    stxvw4x vs0, 0, r3
; CHECK-AIX-64-P8-NEXT:    blr
;
; CHECK-AIX-64-P9-LABEL: test_v8i16_none:
; CHECK-AIX-64-P9:       # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT:    lxsihzx f0, 0, r3
; CHECK-AIX-64-P9-NEXT:    lxv vs1, 0(r3)
; CHECK-AIX-64-P9-NEXT:    xxmrghw vs0, vs0, vs1
; CHECK-AIX-64-P9-NEXT:    stxv vs0, 0(r3)
; CHECK-AIX-64-P9-NEXT:    blr
;
; CHECK-AIX-32-P8-LABEL: test_v8i16_none:
; CHECK-AIX-32-P8:       # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT:    lhz r4, 0(r3)
; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r3
; CHECK-AIX-32-P8-NEXT:    mtfprwz f1, r4
; CHECK-AIX-32-P8-NEXT:    xxmrghw vs0, vs1, vs0
; CHECK-AIX-32-P8-NEXT:    stxvw4x vs0, 0, r3
; CHECK-AIX-32-P8-NEXT:    blr
;
; CHECK-AIX-32-P9-LABEL: test_v8i16_none:
; CHECK-AIX-32-P9:       # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT:    lxsihzx f0, 0, r3
; CHECK-AIX-32-P9-NEXT:    lxv vs1, 0(r3)
; CHECK-AIX-32-P9-NEXT:    xxmrghw vs0, vs0, vs1
; CHECK-AIX-32-P9-NEXT:    stxv vs0, 0(r3)
; CHECK-AIX-32-P9-NEXT:    blr
entry:
  %0 = load <2 x i8>, ptr undef, align 1
  %tmp0_1 = bitcast <2 x i8> %0 to i16
  %tmp0_2 = insertelement <8 x i16> undef, i16 %tmp0_1, i32 0
  %tmp0_3 = bitcast <8 x i16> %tmp0_2 to <4 x i32>
  %1 = load <4 x i32>, ptr %a, align 1
  %2 = shufflevector <4 x i32> %tmp0_3, <4 x i32> %1, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
  store <4 x i32> %2, ptr undef, align 4
  ret void
}

define void @test_none_v4i32(<2 x i32> %vec, ptr %ptr1) {
; CHECK-LE-P8-LABEL: test_none_v4i32:
; CHECK-LE-P8:       # %bb.0: # %entry
; CHECK-LE-P8-NEXT:    addis r3, r2, .LCPI2_0@toc@ha
; CHECK-LE-P8-NEXT:    xxswapd vs0, v2
; CHECK-LE-P8-NEXT:    addi r3, r3, .LCPI2_0@toc@l
; CHECK-LE-P8-NEXT:    lxvd2x vs1, 0, r3
; CHECK-LE-P8-NEXT:    mffprwz r3, f0
; CHECK-LE-P8-NEXT:    xxswapd v3, vs1
; CHECK-LE-P8-NEXT:    mtvsrwz v4, r3
; CHECK-LE-P8-NEXT:    vperm v2, v4, v2, v3
; CHECK-LE-P8-NEXT:    xxswapd vs0, v2
; CHECK-LE-P8-NEXT:    stxvd2x vs0, 0, r5
; CHECK-LE-P8-NEXT:    blr
;
; CHECK-LE-P9-LABEL: test_none_v4i32:
; CHECK-LE-P9:       # %bb.0: # %entry
; CHECK-LE-P9-NEXT:    li r3, 0
; CHECK-LE-P9-NEXT:    vextuwrx r3, r3, v2
; CHECK-LE-P9-NEXT:    mtfprwz f0, r3
; CHECK-LE-P9-NEXT:    addis r3, r2, .LCPI2_0@toc@ha
; CHECK-LE-P9-NEXT:    addi r3, r3, .LCPI2_0@toc@l
; CHECK-LE-P9-NEXT:    lxv vs1, 0(r3)
; CHECK-LE-P9-NEXT:    xxperm v2, vs0, vs1
; CHECK-LE-P9-NEXT:    stxv v2, 0(r5)
; CHECK-LE-P9-NEXT:    blr
;
; CHECK-BE-P8-LABEL: test_none_v4i32:
; CHECK-BE-P8:       # %bb.0: # %entry
; CHECK-BE-P8-NEXT:    xxsldwi vs0, v2, v2, 3
; CHECK-BE-P8-NEXT:    addis r3, r2, .LCPI2_0@toc@ha
; CHECK-BE-P8-NEXT:    addi r3, r3, .LCPI2_0@toc@l
; CHECK-BE-P8-NEXT:    lxvw4x v3, 0, r3
; CHECK-BE-P8-NEXT:    mffprwz r4, f0
; CHECK-BE-P8-NEXT:    mtvsrwz v4, r4
; CHECK-BE-P8-NEXT:    vperm v2, v2, v4, v3
; CHECK-BE-P8-NEXT:    stxvw4x v2, 0, r5
; CHECK-BE-P8-NEXT:    blr
;
; CHECK-BE-P9-LABEL: test_none_v4i32:
; CHECK-BE-P9:       # %bb.0: # %entry
; CHECK-BE-P9-NEXT:    li r3, 0
; CHECK-BE-P9-NEXT:    vextuwlx r3, r3, v2
; CHECK-BE-P9-NEXT:    mtfprwz f0, r3
; CHECK-BE-P9-NEXT:    addis r3, r2, .LCPI2_0@toc@ha
; CHECK-BE-P9-NEXT:    addi r3, r3, .LCPI2_0@toc@l
; CHECK-BE-P9-NEXT:    lxv vs1, 0(r3)
; CHECK-BE-P9-NEXT:    xxperm vs0, v2, vs1
; CHECK-BE-P9-NEXT:    stxv vs0, 0(r5)
; CHECK-BE-P9-NEXT:    blr
;
; CHECK-AIX-64-P8-LABEL: test_none_v4i32:
; CHECK-AIX-64-P8:       # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT:    xxsldwi vs0, v2, v2, 3
; CHECK-AIX-64-P8-NEXT:    ld r4, L..C0(r2) # %const.0
; CHECK-AIX-64-P8-NEXT:    mffprwz r5, f0
; CHECK-AIX-64-P8-NEXT:    lxvw4x v3, 0, r4
; CHECK-AIX-64-P8-NEXT:    mtvsrwz v4, r5
; CHECK-AIX-64-P8-NEXT:    vperm v2, v2, v4, v3
; CHECK-AIX-64-P8-NEXT:    stxvw4x v2, 0, r3
; CHECK-AIX-64-P8-NEXT:    blr
;
; CHECK-AIX-64-P9-LABEL: test_none_v4i32:
; CHECK-AIX-64-P9:       # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT:    li r4, 0
; CHECK-AIX-64-P9-NEXT:    vextuwlx r4, r4, v2
; CHECK-AIX-64-P9-NEXT:    mtfprwz f0, r4
; CHECK-AIX-64-P9-NEXT:    ld r4, L..C0(r2) # %const.0
; CHECK-AIX-64-P9-NEXT:    lxv vs1, 0(r4)
; CHECK-AIX-64-P9-NEXT:    xxperm vs0, v2, vs1
; CHECK-AIX-64-P9-NEXT:    stxv vs0, 0(r3)
; CHECK-AIX-64-P9-NEXT:    blr
;
; CHECK-AIX-32-P8-LABEL: test_none_v4i32:
; CHECK-AIX-32-P8:       # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT:    lwz r4, L..C0(r2) # %const.0
; CHECK-AIX-32-P8-NEXT:    addi r5, r1, -16
; CHECK-AIX-32-P8-NEXT:    stxvw4x v2, 0, r5
; CHECK-AIX-32-P8-NEXT:    lxsiwzx v3, 0, r5
; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r4
; CHECK-AIX-32-P8-NEXT:    vperm v2, v2, v3, v4
; CHECK-AIX-32-P8-NEXT:    stxvw4x v2, 0, r3
; CHECK-AIX-32-P8-NEXT:    blr
;
; CHECK-AIX-32-P9-LABEL: test_none_v4i32:
; CHECK-AIX-32-P9:       # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT:    addi r4, r1, -16
; CHECK-AIX-32-P9-NEXT:    stxv v2, -16(r1)
; CHECK-AIX-32-P9-NEXT:    lfiwzx f0, 0, r4
; CHECK-AIX-32-P9-NEXT:    lwz r4, L..C0(r2) # %const.0
; CHECK-AIX-32-P9-NEXT:    lxv vs1, 0(r4)
; CHECK-AIX-32-P9-NEXT:    xxperm vs0, v2, vs1
; CHECK-AIX-32-P9-NEXT:    stxv vs0, 0(r3)
; CHECK-AIX-32-P9-NEXT:    blr
entry:
  %0 = extractelement <2 x i32> %vec, i64 0
  %1 = bitcast i32 %0 to <2 x i16>
  %2 = shufflevector <2 x i16> %1, <2 x i16> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
  %3 = shufflevector <2 x i32> %vec, <2 x i32> %vec, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  %4 = bitcast <4 x i32> %3 to <8 x i16>
  %5 = shufflevector <8 x i16> %4, <8 x i16> %2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
  store <8 x i16> %5, ptr %ptr1, align 16
  ret void
}

define void @test_v4i32_none(<2 x i32> %vec, ptr %ptr1) {
; CHECK-LE-P8-LABEL: test_v4i32_none:
; CHECK-LE-P8:       # %bb.0: # %entry
; CHECK-LE-P8-NEXT:    addis r3, r2, .LCPI3_0@toc@ha
; CHECK-LE-P8-NEXT:    xxswapd vs0, v2
; CHECK-LE-P8-NEXT:    addi r3, r3, .LCPI3_0@toc@l
; CHECK-LE-P8-NEXT:    lxvd2x vs1, 0, r3
; CHECK-LE-P8-NEXT:    mffprwz r3, f0
; CHECK-LE-P8-NEXT:    xxswapd v3, vs1
; CHECK-LE-P8-NEXT:    mtvsrwz v4, r3
; CHECK-LE-P8-NEXT:    vperm v2, v2, v4, v3
; CHECK-LE-P8-NEXT:    xxswapd vs0, v2
; CHECK-LE-P8-NEXT:    stxvd2x vs0, 0, r5
; CHECK-LE-P8-NEXT:    blr
;
; CHECK-LE-P9-LABEL: test_v4i32_none:
; CHECK-LE-P9:       # %bb.0: # %entry
; CHECK-LE-P9-NEXT:    li r3, 0
; CHECK-LE-P9-NEXT:    vextuwrx r3, r3, v2
; CHECK-LE-P9-NEXT:    mtfprwz f0, r3
; CHECK-LE-P9-NEXT:    addis r3, r2, .LCPI3_0@toc@ha
; CHECK-LE-P9-NEXT:    addi r3, r3, .LCPI3_0@toc@l
; CHECK-LE-P9-NEXT:    lxv vs1, 0(r3)
; CHECK-LE-P9-NEXT:    xxperm vs0, v2, vs1
; CHECK-LE-P9-NEXT:    stxv vs0, 0(r5)
; CHECK-LE-P9-NEXT:    blr
;
; CHECK-BE-P8-LABEL: test_v4i32_none:
; CHECK-BE-P8:       # %bb.0: # %entry
; CHECK-BE-P8-NEXT:    xxsldwi vs0, v2, v2, 3
; CHECK-BE-P8-NEXT:    addis r3, r2, .LCPI3_0@toc@ha
; CHECK-BE-P8-NEXT:    addi r3, r3, .LCPI3_0@toc@l
; CHECK-BE-P8-NEXT:    lxvw4x v3, 0, r3
; CHECK-BE-P8-NEXT:    mffprwz r4, f0
; CHECK-BE-P8-NEXT:    mtvsrwz v4, r4
; CHECK-BE-P8-NEXT:    vperm v2, v4, v2, v3
; CHECK-BE-P8-NEXT:    stxvw4x v2, 0, r5
; CHECK-BE-P8-NEXT:    blr
;
; CHECK-BE-P9-LABEL: test_v4i32_none:
; CHECK-BE-P9:       # %bb.0: # %entry
; CHECK-BE-P9-NEXT:    li r3, 0
; CHECK-BE-P9-NEXT:    vextuwlx r3, r3, v2
; CHECK-BE-P9-NEXT:    mtfprwz f0, r3
; CHECK-BE-P9-NEXT:    addis r3, r2, .LCPI3_0@toc@ha
; CHECK-BE-P9-NEXT:    addi r3, r3, .LCPI3_0@toc@l
; CHECK-BE-P9-NEXT:    lxv vs1, 0(r3)
; CHECK-BE-P9-NEXT:    xxperm v2, vs0, vs1
; CHECK-BE-P9-NEXT:    stxv v2, 0(r5)
; CHECK-BE-P9-NEXT:    blr
;
; CHECK-AIX-64-P8-LABEL: test_v4i32_none:
; CHECK-AIX-64-P8:       # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT:    xxsldwi vs0, v2, v2, 3
; CHECK-AIX-64-P8-NEXT:    ld r4, L..C1(r2) # %const.0
; CHECK-AIX-64-P8-NEXT:    mffprwz r5, f0
; CHECK-AIX-64-P8-NEXT:    lxvw4x v3, 0, r4
; CHECK-AIX-64-P8-NEXT:    mtvsrwz v4, r5
; CHECK-AIX-64-P8-NEXT:    vperm v2, v4, v2, v3
; CHECK-AIX-64-P8-NEXT:    stxvw4x v2, 0, r3
; CHECK-AIX-64-P8-NEXT:    blr
;
; CHECK-AIX-64-P9-LABEL: test_v4i32_none:
; CHECK-AIX-64-P9:       # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT:    li r4, 0
; CHECK-AIX-64-P9-NEXT:    vextuwlx r4, r4, v2
; CHECK-AIX-64-P9-NEXT:    mtfprwz f0, r4
; CHECK-AIX-64-P9-NEXT:    ld r4, L..C1(r2) # %const.0
; CHECK-AIX-64-P9-NEXT:    lxv vs1, 0(r4)
; CHECK-AIX-64-P9-NEXT:    xxperm v2, vs0, vs1
; CHECK-AIX-64-P9-NEXT:    stxv v2, 0(r3)
; CHECK-AIX-64-P9-NEXT:    blr
;
; CHECK-AIX-32-P8-LABEL: test_v4i32_none:
; CHECK-AIX-32-P8:       # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT:    lwz r4, L..C1(r2) # %const.0
; CHECK-AIX-32-P8-NEXT:    addi r5, r1, -16
; CHECK-AIX-32-P8-NEXT:    stxvw4x v2, 0, r5
; CHECK-AIX-32-P8-NEXT:    lxsiwzx v3, 0, r5
; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r4
; CHECK-AIX-32-P8-NEXT:    vperm v2, v3, v2, v4
; CHECK-AIX-32-P8-NEXT:    stxvw4x v2, 0, r3
; CHECK-AIX-32-P8-NEXT:    blr
;
; CHECK-AIX-32-P9-LABEL: test_v4i32_none:
; CHECK-AIX-32-P9:       # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT:    addi r4, r1, -16
; CHECK-AIX-32-P9-NEXT:    stxv v2, -16(r1)
; CHECK-AIX-32-P9-NEXT:    lfiwzx f0, 0, r4
; CHECK-AIX-32-P9-NEXT:    lwz r4, L..C1(r2) # %const.0
; CHECK-AIX-32-P9-NEXT:    lxv vs1, 0(r4)
; CHECK-AIX-32-P9-NEXT:    xxperm v2, vs0, vs1
; CHECK-AIX-32-P9-NEXT:    stxv v2, 0(r3)
; CHECK-AIX-32-P9-NEXT:    blr
entry:
  %0 = extractelement <2 x i32> %vec, i64 0
  %1 = bitcast i32 %0 to <2 x i16>
  %2 = shufflevector <2 x i16> %1, <2 x i16> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
  %3 = shufflevector <2 x i32> %vec, <2 x i32> %vec, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  %4 = bitcast <4 x i32> %3 to <8 x i16>
  %5 = shufflevector <8 x i16> %2, <8 x i16> %4, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13>
  store <8 x i16> %5, ptr %ptr1, align 16
  ret void
}

define void @test_none_v2i64(ptr %ptr, i32 %v1, <2 x i32> %vec) local_unnamed_addr #0 {
; CHECK-LE-P8-LABEL: test_none_v2i64:
; CHECK-LE-P8:       # %bb.0: # %entry
; CHECK-LE-P8-NEXT:    addis r5, r2, .LCPI4_0@toc@ha
; CHECK-LE-P8-NEXT:    mtvsrwz v4, r4
; CHECK-LE-P8-NEXT:    addis r4, r2, .LCPI4_1@toc@ha
; CHECK-LE-P8-NEXT:    addi r5, r5, .LCPI4_0@toc@l
; CHECK-LE-P8-NEXT:    addi r4, r4, .LCPI4_1@toc@l
; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r5
; CHECK-LE-P8-NEXT:    xxswapd v3, vs0
; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r4
; CHECK-LE-P8-NEXT:    vperm v2, v2, v4, v3
; CHECK-LE-P8-NEXT:    lxsdx v3, 0, r3
; CHECK-LE-P8-NEXT:    xxswapd v4, vs0
; CHECK-LE-P8-NEXT:    vperm v2, v3, v2, v4
; CHECK-LE-P8-NEXT:    xxswapd vs0, v2
; CHECK-LE-P8-NEXT:    stxvd2x vs0, 0, r3
;
; CHECK-LE-P9-LABEL: test_none_v2i64:
; CHECK-LE-P9:       # %bb.0: # %entry
; CHECK-LE-P9-NEXT:    lfd f0, 0(r3)
; CHECK-LE-P9-NEXT:    addis r3, r2, .LCPI4_0@toc@ha
; CHECK-LE-P9-NEXT:    mtfprwz f1, r4
; CHECK-LE-P9-NEXT:    addi r3, r3, .LCPI4_0@toc@l
; CHECK-LE-P9-NEXT:    xxinsertw v2, vs1, 12
; CHECK-LE-P9-NEXT:    lxv vs1, 0(r3)
; CHECK-LE-P9-NEXT:    xxperm v2, vs0, vs1
; CHECK-LE-P9-NEXT:    stxv v2, 0(r3)
;
; CHECK-BE-P8-LABEL: test_none_v2i64:
; CHECK-BE-P8:       # %bb.0: # %entry
; CHECK-BE-P8-NEXT:    addis r5, r2, .LCPI4_0@toc@ha
; CHECK-BE-P8-NEXT:    mtvsrwz v4, r4
; CHECK-BE-P8-NEXT:    addis r4, r2, .LCPI4_1@toc@ha
; CHECK-BE-P8-NEXT:    addi r5, r5, .LCPI4_0@toc@l
; CHECK-BE-P8-NEXT:    addi r4, r4, .LCPI4_1@toc@l
; CHECK-BE-P8-NEXT:    lxvw4x v3, 0, r5
; CHECK-BE-P8-NEXT:    vperm v2, v4, v2, v3
; CHECK-BE-P8-NEXT:    lxsdx v3, 0, r3
; CHECK-BE-P8-NEXT:    lxvw4x v4, 0, r4
; CHECK-BE-P8-NEXT:    vperm v2, v2, v3, v4
; CHECK-BE-P8-NEXT:    stxvw4x v2, 0, r3
;
; CHECK-BE-P9-LABEL: test_none_v2i64:
; CHECK-BE-P9:       # %bb.0: # %entry
; CHECK-BE-P9-NEXT:    lfd f0, 0(r3)
; CHECK-BE-P9-NEXT:    addis r3, r2, .LCPI4_0@toc@ha
; CHECK-BE-P9-NEXT:    mtfprwz f1, r4
; CHECK-BE-P9-NEXT:    addi r3, r3, .LCPI4_0@toc@l
; CHECK-BE-P9-NEXT:    xxinsertw v2, vs1, 0
; CHECK-BE-P9-NEXT:    lxv vs1, 0(r3)
; CHECK-BE-P9-NEXT:    xxperm vs0, v2, vs1
; CHECK-BE-P9-NEXT:    stxv vs0, 0(r3)
;
; CHECK-AIX-64-P8-LABEL: test_none_v2i64:
; CHECK-AIX-64-P8:       # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT:    ld r5, L..C2(r2) # %const.0
; CHECK-AIX-64-P8-NEXT:    mtvsrwz v4, r4
; CHECK-AIX-64-P8-NEXT:    ld r4, L..C3(r2) # %const.1
; CHECK-AIX-64-P8-NEXT:    lxvw4x v3, 0, r5
; CHECK-AIX-64-P8-NEXT:    vperm v2, v4, v2, v3
; CHECK-AIX-64-P8-NEXT:    lxsdx v3, 0, r3
; CHECK-AIX-64-P8-NEXT:    lxvw4x v4, 0, r4
; CHECK-AIX-64-P8-NEXT:    vperm v2, v2, v3, v4
; CHECK-AIX-64-P8-NEXT:    stxvw4x v2, 0, r3
;
; CHECK-AIX-64-P9-LABEL: test_none_v2i64:
; CHECK-AIX-64-P9:       # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT:    lfd f0, 0(r3)
; CHECK-AIX-64-P9-NEXT:    ld r3, L..C2(r2) # %const.0
; CHECK-AIX-64-P9-NEXT:    mtfprwz f1, r4
; CHECK-AIX-64-P9-NEXT:    xxinsertw v2, vs1, 0
; CHECK-AIX-64-P9-NEXT:    lxv vs1, 0(r3)
; CHECK-AIX-64-P9-NEXT:    xxperm vs0, v2, vs1
; CHECK-AIX-64-P9-NEXT:    stxv vs0, 0(r3)
;
; CHECK-AIX-32-P8-LABEL: test_none_v2i64:
; CHECK-AIX-32-P8:       # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT:    lwz r5, L..C2(r2) # %const.0
; CHECK-AIX-32-P8-NEXT:    lxsiwzx v3, 0, r3
; CHECK-AIX-32-P8-NEXT:    stw r4, -16(r1)
; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
; CHECK-AIX-32-P8-NEXT:    lxvw4x v5, 0, r3
; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C3(r2) # %const.1
; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r5
; CHECK-AIX-32-P8-NEXT:    vperm v2, v5, v2, v4
; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r3
; CHECK-AIX-32-P8-NEXT:    vperm v2, v2, v3, v4
; CHECK-AIX-32-P8-NEXT:    stxvw4x v2, 0, r3
;
; CHECK-AIX-32-P9-LABEL: test_none_v2i64:
; CHECK-AIX-32-P9:       # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT:    lfiwzx f0, 0, r3
; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C2(r2) # %const.0
; CHECK-AIX-32-P9-NEXT:    mtfprwz f1, r4
; CHECK-AIX-32-P9-NEXT:    xxinsertw v2, vs1, 0
; CHECK-AIX-32-P9-NEXT:    lxv vs1, 0(r3)
; CHECK-AIX-32-P9-NEXT:    xxperm vs0, v2, vs1
; CHECK-AIX-32-P9-NEXT:    stxv vs0, 0(r3)
entry:
  %0 = load <2 x i32>, ptr %ptr, align 4
  %tmp = insertelement <2 x i32> %vec, i32 %v1, i32 0
  %1 = shufflevector <2 x i32> %0, <2 x i32> %tmp, <4 x i32> <i32 3, i32 2, i32 2, i32 0>
  store <4 x i32> %1, ptr undef, align 4
  unreachable
}

define void @test_v2i64_none() {
; CHECK-LE-P8-LABEL: test_v2i64_none:
; CHECK-LE-P8:       # %bb.0: # %entry
; CHECK-LE-P8-NEXT:    lfdx f0, 0, r3
; CHECK-LE-P8-NEXT:    xxmrghw vs0, vs0, vs0
; CHECK-LE-P8-NEXT:    xxswapd vs0, vs0
; CHECK-LE-P8-NEXT:    stxvd2x vs0, 0, r3
; CHECK-LE-P8-NEXT:    blr
;
; CHECK-LE-P9-LABEL: test_v2i64_none:
; CHECK-LE-P9:       # %bb.0: # %entry
; CHECK-LE-P9-NEXT:    lfd f0, 0(r3)
; CHECK-LE-P9-NEXT:    xxmrghw vs0, vs0, vs0
; CHECK-LE-P9-NEXT:    stxv vs0, 0(r3)
; CHECK-LE-P9-NEXT:    blr
;
; CHECK-BE-P8-LABEL: test_v2i64_none:
; CHECK-BE-P8:       # %bb.0: # %entry
; CHECK-BE-P8-NEXT:    lfdx f0, 0, r3
; CHECK-BE-P8-NEXT:    xxmrghw vs0, vs0, vs0
; CHECK-BE-P8-NEXT:    stxvw4x vs0, 0, r3
; CHECK-BE-P8-NEXT:    blr
;
; CHECK-BE-P9-LABEL: test_v2i64_none:
; CHECK-BE-P9:       # %bb.0: # %entry
; CHECK-BE-P9-NEXT:    lfd f0, 0(r3)
; CHECK-BE-P9-NEXT:    xxmrghw vs0, vs0, vs0
; CHECK-BE-P9-NEXT:    stxv vs0, 0(r3)
; CHECK-BE-P9-NEXT:    blr
;
; CHECK-AIX-64-P8-LABEL: test_v2i64_none:
; CHECK-AIX-64-P8:       # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT:    lfdx f0, 0, r3
; CHECK-AIX-64-P8-NEXT:    xxmrghw vs0, vs0, vs0
; CHECK-AIX-64-P8-NEXT:    stxvw4x vs0, 0, r3
; CHECK-AIX-64-P8-NEXT:    blr
;
; CHECK-AIX-64-P9-LABEL: test_v2i64_none:
; CHECK-AIX-64-P9:       # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT:    lfd f0, 0(r3)
; CHECK-AIX-64-P9-NEXT:    xxmrghw vs0, vs0, vs0
; CHECK-AIX-64-P9-NEXT:    stxv vs0, 0(r3)
; CHECK-AIX-64-P9-NEXT:    blr
;
; CHECK-AIX-32-P8-LABEL: test_v2i64_none:
; CHECK-AIX-32-P8:       # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT:    lfiwzx f0, 0, r3
; CHECK-AIX-32-P8-NEXT:    xxspltw vs0, vs0, 1
; CHECK-AIX-32-P8-NEXT:    stxvw4x vs0, 0, r3
; CHECK-AIX-32-P8-NEXT:    blr
;
; CHECK-AIX-32-P9-LABEL: test_v2i64_none:
; CHECK-AIX-32-P9:       # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT:    lxvwsx vs0, 0, r3
; CHECK-AIX-32-P9-NEXT:    stxv vs0, 0(r3)
; CHECK-AIX-32-P9-NEXT:    blr
entry:
  %0 = load <2 x i32>, ptr undef, align 4
  %1 = shufflevector <2 x i32> %0, <2 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
  store <4 x i32> %1, ptr undef, align 4
  ret void
}

define void @test_v8i16_v8i16(ptr %a) {
; CHECK-LE-P8-LABEL: test_v8i16_v8i16:
; CHECK-LE-P8:       # %bb.0: # %entry
; CHECK-LE-P8-NEXT:    lhz r4, 0(r3)
; CHECK-LE-P8-NEXT:    lhz r3, 0(r3)
; CHECK-LE-P8-NEXT:    mtfprd f0, r4
; CHECK-LE-P8-NEXT:    mtfprd f1, r3
; CHECK-LE-P8-NEXT:    xxmrglw vs0, vs1, vs0
; CHECK-LE-P8-NEXT:    xxswapd vs0, vs0
; CHECK-LE-P8-NEXT:    stxvd2x vs0, 0, r3
; CHECK-LE-P8-NEXT:    blr
;
; CHECK-LE-P9-LABEL: test_v8i16_v8i16:
; CHECK-LE-P9:       # %bb.0: # %entry
; CHECK-LE-P9-NEXT:    lxsihzx f0, 0, r3
; CHECK-LE-P9-NEXT:    lxsihzx f1, 0, r3
; CHECK-LE-P9-NEXT:    xxmrglw vs0, vs1, vs0
; CHECK-LE-P9-NEXT:    stxv vs0, 0(r3)
; CHECK-LE-P9-NEXT:    blr
;
; CHECK-BE-P8-LABEL: test_v8i16_v8i16:
; CHECK-BE-P8:       # %bb.0: # %entry
; CHECK-BE-P8-NEXT:    lhz r4, 0(r3)
; CHECK-BE-P8-NEXT:    lhz r3, 0(r3)
; CHECK-BE-P8-NEXT:    mtfprwz f0, r4
; CHECK-BE-P8-NEXT:    mtfprwz f1, r3
; CHECK-BE-P8-NEXT:    xxmrghw vs0, vs0, vs1
; CHECK-BE-P8-NEXT:    stxvw4x vs0, 0, r3
; CHECK-BE-P8-NEXT:    blr
;
; CHECK-BE-P9-LABEL: test_v8i16_v8i16:
; CHECK-BE-P9:       # %bb.0: # %entry
; CHECK-BE-P9-NEXT:    lxsihzx f0, 0, r3
; CHECK-BE-P9-NEXT:    lxsihzx f1, 0, r3
; CHECK-BE-P9-NEXT:    xxmrghw vs0, vs0, vs1
; CHECK-BE-P9-NEXT:    stxv vs0, 0(r3)
; CHECK-BE-P9-NEXT:    blr
;
; CHECK-AIX-64-P8-LABEL: test_v8i16_v8i16:
; CHECK-AIX-64-P8:       # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT:    lhz r4, 0(r3)
; CHECK-AIX-64-P8-NEXT:    lhz r3, 0(r3)
; CHECK-AIX-64-P8-NEXT:    mtfprwz f0, r4
; CHECK-AIX-64-P8-NEXT:    mtfprwz f1, r3
; CHECK-AIX-64-P8-NEXT:    xxmrghw vs0, vs0, vs1
; CHECK-AIX-64-P8-NEXT:    stxvw4x vs0, 0, r3
; CHECK-AIX-64-P8-NEXT:    blr
;
; CHECK-AIX-64-P9-LABEL: test_v8i16_v8i16:
; CHECK-AIX-64-P9:       # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT:    lxsihzx f0, 0, r3
; CHECK-AIX-64-P9-NEXT:    lxsihzx f1, 0, r3
; CHECK-AIX-64-P9-NEXT:    xxmrghw vs0, vs0, vs1
; CHECK-AIX-64-P9-NEXT:    stxv vs0, 0(r3)
; CHECK-AIX-64-P9-NEXT:    blr
;
; CHECK-AIX-32-P8-LABEL: test_v8i16_v8i16:
; CHECK-AIX-32-P8:       # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT:    lhz r4, 0(r3)
; CHECK-AIX-32-P8-NEXT:    lhz r3, 0(r3)
; CHECK-AIX-32-P8-NEXT:    mtfprwz f0, r4
; CHECK-AIX-32-P8-NEXT:    mtfprwz f1, r3
; CHECK-AIX-32-P8-NEXT:    xxmrghw vs0, vs0, vs1
; CHECK-AIX-32-P8-NEXT:    stxvw4x vs0, 0, r3
; CHECK-AIX-32-P8-NEXT:    blr
;
; CHECK-AIX-32-P9-LABEL: test_v8i16_v8i16:
; CHECK-AIX-32-P9:       # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT:    lxsihzx f0, 0, r3
; CHECK-AIX-32-P9-NEXT:    lxsihzx f1, 0, r3
; CHECK-AIX-32-P9-NEXT:    xxmrghw vs0, vs0, vs1
; CHECK-AIX-32-P9-NEXT:    stxv vs0, 0(r3)
; CHECK-AIX-32-P9-NEXT:    blr
entry:
  %0 = load <2 x i8>, ptr undef, align 1
  %tmp0_1 = bitcast <2 x i8> %0 to i16
  %tmp0_2 = insertelement <8 x i16> undef, i16 %tmp0_1, i32 0
  %tmp0_3 = bitcast <8 x i16> %tmp0_2 to <4 x i32>
  %1 = load <2 x i8>, ptr %a, align 1
  %tmp1_1 = bitcast <2 x i8> %1 to i16
  %tmp1_2 = insertelement <8 x i16> undef, i16 %tmp1_1, i32 0
  %tmp1_3 = bitcast <8 x i16> %tmp1_2 to <4 x i32>
  %2 = shufflevector <4 x i32> %tmp0_3, <4 x i32> %tmp1_3, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
  store <4 x i32> %2, ptr undef, align 4
  ret void
}

define void @test_v8i16_v4i32(ptr %a) {
; CHECK-LE-P8-LABEL: test_v8i16_v4i32:
; CHECK-LE-P8:       # %bb.0: # %entry
; CHECK-LE-P8-NEXT:    lhz r4, 0(r3)
; CHECK-LE-P8-NEXT:    lfiwzx f0, 0, r3
; CHECK-LE-P8-NEXT:    mtfprd f1, r4
; CHECK-LE-P8-NEXT:    xxswapd vs0, f0
; CHECK-LE-P8-NEXT:    xxswapd vs1, vs1
; CHECK-LE-P8-NEXT:    xxmrglw vs0, vs0, vs1
; CHECK-LE-P8-NEXT:    xxswapd vs0, vs0
; CHECK-LE-P8-NEXT:    stxvd2x vs0, 0, r3
; CHECK-LE-P8-NEXT:    blr
;
; CHECK-LE-P9-LABEL: test_v8i16_v4i32:
; CHECK-LE-P9:       # %bb.0: # %entry
; CHECK-LE-P9-NEXT:    lxsihzx v2, 0, r3
; CHECK-LE-P9-NEXT:    lfiwzx f0, 0, r3
; CHECK-LE-P9-NEXT:    xxswapd vs0, f0
; CHECK-LE-P9-NEXT:    vsplth v2, v2, 3
; CHECK-LE-P9-NEXT:    xxmrglw vs0, vs0, v2
; CHECK-LE-P9-NEXT:    stxv vs0, 0(r3)
; CHECK-LE-P9-NEXT:    blr
;
; CHECK-BE-P8-LABEL: test_v8i16_v4i32:
; CHECK-BE-P8:       # %bb.0: # %entry
; CHECK-BE-P8-NEXT:    lfiwzx f0, 0, r3
; CHECK-BE-P8-NEXT:    lhz r4, 0(r3)
; CHECK-BE-P8-NEXT:    sldi r3, r4, 48
; CHECK-BE-P8-NEXT:    xxsldwi vs0, f0, f0, 1
; CHECK-BE-P8-NEXT:    mtfprd f1, r3
; CHECK-BE-P8-NEXT:    xxmrghw vs0, vs1, vs0
; CHECK-BE-P8-NEXT:    stxvw4x vs0, 0, r3
; CHECK-BE-P8-NEXT:    blr
;
; CHECK-BE-P9-LABEL: test_v8i16_v4i32:
; CHECK-BE-P9:       # %bb.0: # %entry
; CHECK-BE-P9-NEXT:    lxsihzx v2, 0, r3
; CHECK-BE-P9-NEXT:    lfiwzx f0, 0, r3
; CHECK-BE-P9-NEXT:    xxsldwi vs0, f0, f0, 1
; CHECK-BE-P9-NEXT:    vsplth v2, v2, 3
; CHECK-BE-P9-NEXT:    xxmrghw vs0, v2, vs0
; CHECK-BE-P9-NEXT:    stxv vs0, 0(r3)
; CHECK-BE-P9-NEXT:    blr
;
; CHECK-AIX-64-P8-LABEL: test_v8i16_v4i32:
; CHECK-AIX-64-P8:       # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT:    lfiwzx f0, 0, r3
; CHECK-AIX-64-P8-NEXT:    lhz r4, 0(r3)
; CHECK-AIX-64-P8-NEXT:    sldi r3, r4, 48
; CHECK-AIX-64-P8-NEXT:    xxsldwi vs0, f0, f0, 1
; CHECK-AIX-64-P8-NEXT:    mtfprd f1, r3
; CHECK-AIX-64-P8-NEXT:    xxmrghw vs0, vs1, vs0
; CHECK-AIX-64-P8-NEXT:    stxvw4x vs0, 0, r3
; CHECK-AIX-64-P8-NEXT:    blr
;
; CHECK-AIX-64-P9-LABEL: test_v8i16_v4i32:
; CHECK-AIX-64-P9:       # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT:    lxsihzx v2, 0, r3
; CHECK-AIX-64-P9-NEXT:    lfiwzx f0, 0, r3
; CHECK-AIX-64-P9-NEXT:    xxsldwi vs0, f0, f0, 1
; CHECK-AIX-64-P9-NEXT:    vsplth v2, v2, 3
; CHECK-AIX-64-P9-NEXT:    xxmrghw vs0, v2, vs0
; CHECK-AIX-64-P9-NEXT:    stxv vs0, 0(r3)
; CHECK-AIX-64-P9-NEXT:    blr
;
; CHECK-AIX-32-P8-LABEL: test_v8i16_v4i32:
; CHECK-AIX-32-P8:       # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT:    lhz r4, 0(r3)
; CHECK-AIX-32-P8-NEXT:    sth r4, -32(r1)
; CHECK-AIX-32-P8-NEXT:    addi r4, r1, -32
; CHECK-AIX-32-P8-NEXT:    lwz r3, 0(r3)
; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r4
; CHECK-AIX-32-P8-NEXT:    stw r3, -16(r1)
; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r3
; CHECK-AIX-32-P8-NEXT:    xxmrghw vs0, vs0, vs1
; CHECK-AIX-32-P8-NEXT:    stxvw4x vs0, 0, r3
; CHECK-AIX-32-P8-NEXT:    blr
;
; CHECK-AIX-32-P9-LABEL: test_v8i16_v4i32:
; CHECK-AIX-32-P9:       # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT:    lhz r4, 0(r3)
; CHECK-AIX-32-P9-NEXT:    sth r4, -32(r1)
; CHECK-AIX-32-P9-NEXT:    lwz r3, 0(r3)
; CHECK-AIX-32-P9-NEXT:    lxv vs0, -32(r1)
; CHECK-AIX-32-P9-NEXT:    stw r3, -16(r1)
; CHECK-AIX-32-P9-NEXT:    lxv vs1, -16(r1)
; CHECK-AIX-32-P9-NEXT:    xxmrghw vs0, vs0, vs1
; CHECK-AIX-32-P9-NEXT:    stxv vs0, 0(r3)
; CHECK-AIX-32-P9-NEXT:    blr
entry:
  %0 = load <2 x i8>, ptr undef, align 1
  %tmp0_1 = bitcast <2 x i8> %0 to i16
  %tmp0_2 = insertelement <8 x i16> undef, i16 %tmp0_1, i32 0
  %tmp0_3 = bitcast <8 x i16> %tmp0_2 to <4 x i32>
  %1 = load <2 x i16>, ptr %a, align 4
  %tmp1_1 = bitcast <2 x i16> %1 to i32
  %tmp1_2 = insertelement <4 x i32> undef, i32 %tmp1_1, i32 0
  %2 = shufflevector <4 x i32> %tmp0_3, <4 x i32> %tmp1_2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
  store <4 x i32> %2, ptr undef, align 4
  ret void
}

define void @test_v8i16_v2i64(ptr %a) {
; CHECK-LE-P8-LABEL: test_v8i16_v2i64:
; CHECK-LE-P8:       # %bb.0: # %entry
; CHECK-LE-P8-NEXT:    lhz r4, 0(r3)
; CHECK-LE-P8-NEXT:    lfdx f0, 0, r3
; CHECK-LE-P8-NEXT:    mtfprd f1, r4
; CHECK-LE-P8-NEXT:    xxswapd vs0, f0
; CHECK-LE-P8-NEXT:    xxswapd vs1, vs1
; CHECK-LE-P8-NEXT:    xxmrglw vs0, vs0, vs1
; CHECK-LE-P8-NEXT:    xxswapd vs0, vs0
; CHECK-LE-P8-NEXT:    stxvd2x vs0, 0, r3
; CHECK-LE-P8-NEXT:    blr
;
; CHECK-LE-P9-LABEL: test_v8i16_v2i64:
; CHECK-LE-P9:       # %bb.0: # %entry
; CHECK-LE-P9-NEXT:    lxsihzx v2, 0, r3
; CHECK-LE-P9-NEXT:    lfd f0, 0(r3)
; CHECK-LE-P9-NEXT:    xxswapd vs0, f0
; CHECK-LE-P9-NEXT:    vsplth v2, v2, 3
; CHECK-LE-P9-NEXT:    xxmrglw vs0, vs0, v2
; CHECK-LE-P9-NEXT:    stxv vs0, 0(r3)
; CHECK-LE-P9-NEXT:    blr
;
; CHECK-BE-P8-LABEL: test_v8i16_v2i64:
; CHECK-BE-P8:       # %bb.0: # %entry
; CHECK-BE-P8-NEXT:    lhz r4, 0(r3)
; CHECK-BE-P8-NEXT:    lfdx f0, 0, r3
; CHECK-BE-P8-NEXT:    sldi r3, r4, 48
; CHECK-BE-P8-NEXT:    mtfprd f1, r3
; CHECK-BE-P8-NEXT:    xxmrghw vs0, vs1, vs0
; CHECK-BE-P8-NEXT:    stxvw4x vs0, 0, r3
; CHECK-BE-P8-NEXT:    blr
;
; CHECK-BE-P9-LABEL: test_v8i16_v2i64:
; CHECK-BE-P9:       # %bb.0: # %entry
; CHECK-BE-P9-NEXT:    lxsihzx v2, 0, r3
; CHECK-BE-P9-NEXT:    lfd f0, 0(r3)
; CHECK-BE-P9-NEXT:    vsplth v2, v2, 3
; CHECK-BE-P9-NEXT:    xxmrghw vs0, v2, vs0
; CHECK-BE-P9-NEXT:    stxv vs0, 0(r3)
; CHECK-BE-P9-NEXT:    blr
;
; CHECK-AIX-64-P8-LABEL: test_v8i16_v2i64:
; CHECK-AIX-64-P8:       # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT:    lhz r4, 0(r3)
; CHECK-AIX-64-P8-NEXT:    lfdx f0, 0, r3
; CHECK-AIX-64-P8-NEXT:    sldi r3, r4, 48
; CHECK-AIX-64-P8-NEXT:    mtfprd f1, r3
; CHECK-AIX-64-P8-NEXT:    xxmrghw vs0, vs1, vs0
; CHECK-AIX-64-P8-NEXT:    stxvw4x vs0, 0, r3
; CHECK-AIX-64-P8-NEXT:    blr
;
; CHECK-AIX-64-P9-LABEL: test_v8i16_v2i64:
; CHECK-AIX-64-P9:       # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT:    lxsihzx v2, 0, r3
; CHECK-AIX-64-P9-NEXT:    lfd f0, 0(r3)
; CHECK-AIX-64-P9-NEXT:    vsplth v2, v2, 3
; CHECK-AIX-64-P9-NEXT:    xxmrghw vs0, v2, vs0
; CHECK-AIX-64-P9-NEXT:    stxv vs0, 0(r3)
; CHECK-AIX-64-P9-NEXT:    blr
;
; CHECK-AIX-32-P8-LABEL: test_v8i16_v2i64:
; CHECK-AIX-32-P8:       # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT:    lhz r4, 0(r3)
; CHECK-AIX-32-P8-NEXT:    sth r4, -32(r1)
; CHECK-AIX-32-P8-NEXT:    addi r4, r1, -32
; CHECK-AIX-32-P8-NEXT:    lwz r3, 0(r3)
; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r4
; CHECK-AIX-32-P8-NEXT:    stw r3, -16(r1)
; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r3
; CHECK-AIX-32-P8-NEXT:    xxmrghw vs0, vs0, vs1
; CHECK-AIX-32-P8-NEXT:    stxvw4x vs0, 0, r3
; CHECK-AIX-32-P8-NEXT:    blr
;
; CHECK-AIX-32-P9-LABEL: test_v8i16_v2i64:
; CHECK-AIX-32-P9:       # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT:    lhz r4, 0(r3)
; CHECK-AIX-32-P9-NEXT:    sth r4, -32(r1)
; CHECK-AIX-32-P9-NEXT:    lwz r3, 0(r3)
; CHECK-AIX-32-P9-NEXT:    lxv vs0, -32(r1)
; CHECK-AIX-32-P9-NEXT:    stw r3, -16(r1)
; CHECK-AIX-32-P9-NEXT:    lxv vs1, -16(r1)
; CHECK-AIX-32-P9-NEXT:    xxmrghw vs0, vs0, vs1
; CHECK-AIX-32-P9-NEXT:    stxv vs0, 0(r3)
; CHECK-AIX-32-P9-NEXT:    blr
entry:
  %0 = load <2 x i8>, ptr undef, align 1
  %tmp0_1 = bitcast <2 x i8> %0 to i16
  %tmp0_2 = insertelement <8 x i16> undef, i16 %tmp0_1, i32 0
  %tmp0_3 = bitcast <8 x i16> %tmp0_2 to <4 x i32>
  %1 = load <2 x i16>, ptr %a, align 8
  %tmp1_1 = bitcast <2 x i16> %1 to i32
  %tmp1_2 = insertelement <4 x i32> undef, i32 %tmp1_1, i32 0
  %2 = shufflevector <4 x i32> %tmp0_3, <4 x i32> %tmp1_2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
  store <4 x i32> %2, ptr undef, align 4
  ret void
}

define <16 x i8> @test_v4i32_v4i32(ptr %a, ptr %b) {
; CHECK-LE-P8-LABEL: test_v4i32_v4i32:
; CHECK-LE-P8:       # %bb.0: # %entry
; CHECK-LE-P8-NEXT:    addis r5, r2, .LCPI9_0@toc@ha
; CHECK-LE-P8-NEXT:    lxsiwzx v2, 0, r3
; CHECK-LE-P8-NEXT:    lxsiwzx v3, 0, r4
; CHECK-LE-P8-NEXT:    addi r5, r5, .LCPI9_0@toc@l
; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r5
; CHECK-LE-P8-NEXT:    xxswapd v4, vs0
; CHECK-LE-P8-NEXT:    vperm v2, v3, v2, v4
; CHECK-LE-P8-NEXT:    blr
;
; CHECK-LE-P9-LABEL: test_v4i32_v4i32:
; CHECK-LE-P9:       # %bb.0: # %entry
; CHECK-LE-P9-NEXT:    lxsiwzx v2, 0, r3
; CHECK-LE-P9-NEXT:    addis r3, r2, .LCPI9_0@toc@ha
; CHECK-LE-P9-NEXT:    lfiwzx f0, 0, r4
; CHECK-LE-P9-NEXT:    addi r3, r3, .LCPI9_0@toc@l
; CHECK-LE-P9-NEXT:    lxv vs1, 0(r3)
; CHECK-LE-P9-NEXT:    xxperm v2, vs0, vs1
; CHECK-LE-P9-NEXT:    blr
;
; CHECK-BE-P8-LABEL: test_v4i32_v4i32:
; CHECK-BE-P8:       # %bb.0: # %entry
; CHECK-BE-P8-NEXT:    addis r5, r2, .LCPI9_0@toc@ha
; CHECK-BE-P8-NEXT:    lxsiwzx v2, 0, r3
; CHECK-BE-P8-NEXT:    lxsiwzx v3, 0, r4
; CHECK-BE-P8-NEXT:    addi r5, r5, .LCPI9_0@toc@l
; CHECK-BE-P8-NEXT:    lxvw4x v4, 0, r5
; CHECK-BE-P8-NEXT:    vperm v2, v2, v3, v4
; CHECK-BE-P8-NEXT:    blr
;
; CHECK-BE-P9-LABEL: test_v4i32_v4i32:
; CHECK-BE-P9:       # %bb.0: # %entry
; CHECK-BE-P9-NEXT:    lfiwzx f0, 0, r3
; CHECK-BE-P9-NEXT:    addis r3, r2, .LCPI9_0@toc@ha
; CHECK-BE-P9-NEXT:    lxsiwzx v2, 0, r4
; CHECK-BE-P9-NEXT:    addi r3, r3, .LCPI9_0@toc@l
; CHECK-BE-P9-NEXT:    lxv vs1, 0(r3)
; CHECK-BE-P9-NEXT:    xxperm v2, vs0, vs1
; CHECK-BE-P9-NEXT:    blr
;
; CHECK-AIX-64-P8-LABEL: test_v4i32_v4i32:
; CHECK-AIX-64-P8:       # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT:    ld r5, L..C4(r2) # %const.0
; CHECK-AIX-64-P8-NEXT:    lxsiwzx v2, 0, r3
; CHECK-AIX-64-P8-NEXT:    lxsiwzx v3, 0, r4
; CHECK-AIX-64-P8-NEXT:    lxvw4x v4, 0, r5
; CHECK-AIX-64-P8-NEXT:    vperm v2, v2, v3, v4
; CHECK-AIX-64-P8-NEXT:    blr
;
; CHECK-AIX-64-P9-LABEL: test_v4i32_v4i32:
; CHECK-AIX-64-P9:       # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT:    lfiwzx f0, 0, r3
; CHECK-AIX-64-P9-NEXT:    ld r3, L..C3(r2) # %const.0
; CHECK-AIX-64-P9-NEXT:    lxsiwzx v2, 0, r4
; CHECK-AIX-64-P9-NEXT:    lxv vs1, 0(r3)
; CHECK-AIX-64-P9-NEXT:    xxperm v2, vs0, vs1
; CHECK-AIX-64-P9-NEXT:    blr
;
; CHECK-AIX-32-P8-LABEL: test_v4i32_v4i32:
; CHECK-AIX-32-P8:       # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT:    lwz r5, L..C4(r2) # %const.0
; CHECK-AIX-32-P8-NEXT:    lxsiwzx v2, 0, r3
; CHECK-AIX-32-P8-NEXT:    lxsiwzx v3, 0, r4
; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r5
; CHECK-AIX-32-P8-NEXT:    vperm v2, v2, v3, v4
; CHECK-AIX-32-P8-NEXT:    blr
;
; CHECK-AIX-32-P9-LABEL: test_v4i32_v4i32:
; CHECK-AIX-32-P9:       # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT:    lfiwzx f0, 0, r3
; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C3(r2) # %const.0
; CHECK-AIX-32-P9-NEXT:    lxsiwzx v2, 0, r4
; CHECK-AIX-32-P9-NEXT:    lxv vs1, 0(r3)
; CHECK-AIX-32-P9-NEXT:    xxperm v2, vs0, vs1
; CHECK-AIX-32-P9-NEXT:    blr
entry:
  %load1 = load <4 x i8>, ptr %a
  %load2 = load <4 x i8>, ptr %b
  %shuffle1 = shufflevector <4 x i8> %load1, <4 x i8> %load2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
  %shuffle2 = shufflevector <8 x i8> %shuffle1, <8 x i8> %shuffle1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
  ret <16 x i8> %shuffle2
}

define void @test_v4i32_v8i16(ptr %a) {
; CHECK-LE-P8-LABEL: test_v4i32_v8i16:
; CHECK-LE-P8:       # %bb.0: # %entry
; CHECK-LE-P8-NEXT:    lhz r4, 0(r3)
; CHECK-LE-P8-NEXT:    lfiwzx f0, 0, r3
; CHECK-LE-P8-NEXT:    mtfprd f1, r4
; CHECK-LE-P8-NEXT:    xxswapd vs0, f0
; CHECK-LE-P8-NEXT:    xxswapd vs1, vs1
; CHECK-LE-P8-NEXT:    xxmrglw vs0, vs1, vs0
; CHECK-LE-P8-NEXT:    xxswapd vs0, vs0
; CHECK-LE-P8-NEXT:    stxvd2x vs0, 0, r3
; CHECK-LE-P8-NEXT:    blr
;
; CHECK-LE-P9-LABEL: test_v4i32_v8i16:
; CHECK-LE-P9:       # %bb.0: # %entry
; CHECK-LE-P9-NEXT:    lxsihzx v2, 0, r3
; CHECK-LE-P9-NEXT:    lfiwzx f0, 0, r3
; CHECK-LE-P9-NEXT:    xxswapd vs0, f0
; CHECK-LE-P9-NEXT:    vsplth v2, v2, 3
; CHECK-LE-P9-NEXT:    xxmrglw vs0, v2, vs0
; CHECK-LE-P9-NEXT:    stxv vs0, 0(r3)
; CHECK-LE-P9-NEXT:    blr
;
; CHECK-BE-P8-LABEL: test_v4i32_v8i16:
; CHECK-BE-P8:       # %bb.0: # %entry
; CHECK-BE-P8-NEXT:    lfiwzx f0, 0, r3
; CHECK-BE-P8-NEXT:    lhz r4, 0(r3)
; CHECK-BE-P8-NEXT:    sldi r3, r4, 48
; CHECK-BE-P8-NEXT:    xxsldwi vs0, f0, f0, 1
; CHECK-BE-P8-NEXT:    mtfprd f1, r3
; CHECK-BE-P8-NEXT:    xxmrghw vs0, vs0, vs1
; CHECK-BE-P8-NEXT:    stxvw4x vs0, 0, r3
; CHECK-BE-P8-NEXT:    blr
;
; CHECK-BE-P9-LABEL: test_v4i32_v8i16:
; CHECK-BE-P9:       # %bb.0: # %entry
; CHECK-BE-P9-NEXT:    lxsihzx v2, 0, r3
; CHECK-BE-P9-NEXT:    lfiwzx f0, 0, r3
; CHECK-BE-P9-NEXT:    xxsldwi vs0, f0, f0, 1
; CHECK-BE-P9-NEXT:    vsplth v2, v2, 3
; CHECK-BE-P9-NEXT:    xxmrghw vs0, vs0, v2
; CHECK-BE-P9-NEXT:    stxv vs0, 0(r3)
; CHECK-BE-P9-NEXT:    blr
;
; CHECK-AIX-64-P8-LABEL: test_v4i32_v8i16:
; CHECK-AIX-64-P8:       # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT:    lfiwzx f0, 0, r3
; CHECK-AIX-64-P8-NEXT:    lhz r4, 0(r3)
; CHECK-AIX-64-P8-NEXT:    sldi r3, r4, 48
; CHECK-AIX-64-P8-NEXT:    xxsldwi vs0, f0, f0, 1
; CHECK-AIX-64-P8-NEXT:    mtfprd f1, r3
; CHECK-AIX-64-P8-NEXT:    xxmrghw vs0, vs0, vs1
; CHECK-AIX-64-P8-NEXT:    stxvw4x vs0, 0, r3
; CHECK-AIX-64-P8-NEXT:    blr
;
; CHECK-AIX-64-P9-LABEL: test_v4i32_v8i16:
; CHECK-AIX-64-P9:       # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT:    lxsihzx v2, 0, r3
; CHECK-AIX-64-P9-NEXT:    lfiwzx f0, 0, r3
; CHECK-AIX-64-P9-NEXT:    xxsldwi vs0, f0, f0, 1
; CHECK-AIX-64-P9-NEXT:    vsplth v2, v2, 3
; CHECK-AIX-64-P9-NEXT:    xxmrghw vs0, vs0, v2
; CHECK-AIX-64-P9-NEXT:    stxv vs0, 0(r3)
; CHECK-AIX-64-P9-NEXT:    blr
;
; CHECK-AIX-32-P8-LABEL: test_v4i32_v8i16:
; CHECK-AIX-32-P8:       # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT:    lhz r4, 0(r3)
; CHECK-AIX-32-P8-NEXT:    sth r4, -32(r1)
; CHECK-AIX-32-P8-NEXT:    addi r4, r1, -32
; CHECK-AIX-32-P8-NEXT:    lwz r3, 0(r3)
; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r4
; CHECK-AIX-32-P8-NEXT:    stw r3, -16(r1)
; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r3
; CHECK-AIX-32-P8-NEXT:    xxmrghw vs0, vs1, vs0
; CHECK-AIX-32-P8-NEXT:    stxvw4x vs0, 0, r3
; CHECK-AIX-32-P8-NEXT:    blr
;
; CHECK-AIX-32-P9-LABEL: test_v4i32_v8i16:
; CHECK-AIX-32-P9:       # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT:    lhz r4, 0(r3)
; CHECK-AIX-32-P9-NEXT:    sth r4, -32(r1)
; CHECK-AIX-32-P9-NEXT:    lwz r3, 0(r3)
; CHECK-AIX-32-P9-NEXT:    lxv vs0, -32(r1)
; CHECK-AIX-32-P9-NEXT:    stw r3, -16(r1)
; CHECK-AIX-32-P9-NEXT:    lxv vs1, -16(r1)
; CHECK-AIX-32-P9-NEXT:    xxmrghw vs0, vs1, vs0
; CHECK-AIX-32-P9-NEXT:    stxv vs0, 0(r3)
; CHECK-AIX-32-P9-NEXT:    blr
entry:
  %0 = load <2 x i8>, ptr undef, align 1
  %tmp0_1 = bitcast <2 x i8> %0 to i16
  %tmp0_2 = insertelement <8 x i16> undef, i16 %tmp0_1, i32 0
  %tmp0_3 = bitcast <8 x i16> %tmp0_2 to <4 x i32>
  %1 = load <2 x i16>, ptr %a, align 4
  %tmp1_1 = bitcast <2 x i16> %1 to i32
  %tmp1_2 = insertelement <4 x i32> undef, i32 %tmp1_1, i32 0
  %2 = shufflevector <4 x i32> %tmp1_2, <4 x i32> %tmp0_3, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
  store <4 x i32> %2, ptr undef, align 4
  ret void
}

define void @test_v4i32_v2i64(ptr %a) {
; CHECK-LE-P8-LABEL: test_v4i32_v2i64:
; CHECK-LE-P8:       # %bb.0: # %entry
; CHECK-LE-P8-NEXT:    lfdx f0, 0, r3
; CHECK-LE-P8-NEXT:    lfiwzx f1, 0, r3
; CHECK-LE-P8-NEXT:    xxswapd vs0, f0
; CHECK-LE-P8-NEXT:    xxswapd vs1, f1
; CHECK-LE-P8-NEXT:    xxmrglw vs0, vs0, vs1
; CHECK-LE-P8-NEXT:    xxswapd vs0, vs0
; CHECK-LE-P8-NEXT:    stxvd2x vs0, 0, r3
; CHECK-LE-P8-NEXT:    blr
;
; CHECK-LE-P9-LABEL: test_v4i32_v2i64:
; CHECK-LE-P9:       # %bb.0: # %entry
; CHECK-LE-P9-NEXT:    lfd f0, 0(r3)
; CHECK-LE-P9-NEXT:    lfiwzx f1, 0, r3
; CHECK-LE-P9-NEXT:    xxswapd vs0, f0
; CHECK-LE-P9-NEXT:    xxswapd vs1, f1
; CHECK-LE-P9-NEXT:    xxmrglw vs0, vs0, vs1
; CHECK-LE-P9-NEXT:    stxv vs0, 0(r3)
; CHECK-LE-P9-NEXT:    blr
;
; CHECK-BE-P8-LABEL: test_v4i32_v2i64:
; CHECK-BE-P8:       # %bb.0: # %entry
; CHECK-BE-P8-NEXT:    lfiwzx f1, 0, r3
; CHECK-BE-P8-NEXT:    lfdx f0, 0, r3
; CHECK-BE-P8-NEXT:    xxsldwi vs1, f1, f1, 1
; CHECK-BE-P8-NEXT:    xxmrghw vs0, vs1, vs0
; CHECK-BE-P8-NEXT:    stxvw4x vs0, 0, r3
; CHECK-BE-P8-NEXT:    blr
;
; CHECK-BE-P9-LABEL: test_v4i32_v2i64:
; CHECK-BE-P9:       # %bb.0: # %entry
; CHECK-BE-P9-NEXT:    lfiwzx f1, 0, r3
; CHECK-BE-P9-NEXT:    lfd f0, 0(r3)
; CHECK-BE-P9-NEXT:    xxsldwi vs1, f1, f1, 1
; CHECK-BE-P9-NEXT:    xxmrghw vs0, vs1, vs0
; CHECK-BE-P9-NEXT:    stxv vs0, 0(r3)
; CHECK-BE-P9-NEXT:    blr
;
; CHECK-AIX-64-P8-LABEL: test_v4i32_v2i64:
; CHECK-AIX-64-P8:       # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT:    lfiwzx f1, 0, r3
; CHECK-AIX-64-P8-NEXT:    lfdx f0, 0, r3
; CHECK-AIX-64-P8-NEXT:    xxsldwi vs1, f1, f1, 1
; CHECK-AIX-64-P8-NEXT:    xxmrghw vs0, vs1, vs0
; CHECK-AIX-64-P8-NEXT:    stxvw4x vs0, 0, r3
; CHECK-AIX-64-P8-NEXT:    blr
;
; CHECK-AIX-64-P9-LABEL: test_v4i32_v2i64:
; CHECK-AIX-64-P9:       # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT:    lfiwzx f1, 0, r3
; CHECK-AIX-64-P9-NEXT:    lfd f0, 0(r3)
; CHECK-AIX-64-P9-NEXT:    xxsldwi vs1, f1, f1, 1
; CHECK-AIX-64-P9-NEXT:    xxmrghw vs0, vs1, vs0
; CHECK-AIX-64-P9-NEXT:    stxv vs0, 0(r3)
; CHECK-AIX-64-P9-NEXT:    blr
;
; CHECK-AIX-32-P8-LABEL: test_v4i32_v2i64:
; CHECK-AIX-32-P8:       # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT:    lwz r4, 0(r3)
; CHECK-AIX-32-P8-NEXT:    lwz r3, 0(r3)
; CHECK-AIX-32-P8-NEXT:    stw r4, -16(r1)
; CHECK-AIX-32-P8-NEXT:    stw r3, -32(r1)
; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
; CHECK-AIX-32-P8-NEXT:    addi r4, r1, -32
; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r3
; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r4
; CHECK-AIX-32-P8-NEXT:    xxmrghw vs0, vs1, vs0
; CHECK-AIX-32-P8-NEXT:    stxvw4x vs0, 0, r3
; CHECK-AIX-32-P8-NEXT:    blr
;
; CHECK-AIX-32-P9-LABEL: test_v4i32_v2i64:
; CHECK-AIX-32-P9:       # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT:    lwz r4, 0(r3)
; CHECK-AIX-32-P9-NEXT:    lwz r3, 0(r3)
; CHECK-AIX-32-P9-NEXT:    stw r4, -16(r1)
; CHECK-AIX-32-P9-NEXT:    stw r3, -32(r1)
; CHECK-AIX-32-P9-NEXT:    lxv vs0, -16(r1)
; CHECK-AIX-32-P9-NEXT:    lxv vs1, -32(r1)
; CHECK-AIX-32-P9-NEXT:    xxmrghw vs0, vs1, vs0
; CHECK-AIX-32-P9-NEXT:    stxv vs0, 0(r3)
; CHECK-AIX-32-P9-NEXT:    blr
entry:
  %0 = load <2 x i16>, ptr undef, align 8
  %tmp0_1 = bitcast <2 x i16> %0 to i32
  %tmp0_2 = insertelement <4 x i32> undef, i32 %tmp0_1, i32 0
  %1 = load <2 x i16>, ptr %a, align 4
  %tmp1_1 = bitcast <2 x i16> %1 to i32
  %tmp1_2 = insertelement <4 x i32> undef, i32 %tmp1_1, i32 0
  %2 = shufflevector <4 x i32> %tmp1_2, <4 x i32> %tmp0_2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
  store <4 x i32> %2, ptr undef, align 4
  ret void
}

define void @test_v2i64_v2i64(ptr %a) {
; CHECK-LE-P8-LABEL: test_v2i64_v2i64:
; CHECK-LE-P8:       # %bb.0: # %entry
; CHECK-LE-P8-NEXT:    lfdx f0, 0, r3
; CHECK-LE-P8-NEXT:    lfdx f1, 0, r3
; CHECK-LE-P8-NEXT:    xxmrghw vs0, vs1, vs0
; CHECK-LE-P8-NEXT:    xxswapd vs0, vs0
; CHECK-LE-P8-NEXT:    stxvd2x vs0, 0, r3
; CHECK-LE-P8-NEXT:    blr
;
; CHECK-LE-P9-LABEL: test_v2i64_v2i64:
; CHECK-LE-P9:       # %bb.0: # %entry
; CHECK-LE-P9-NEXT:    lfd f0, 0(r3)
; CHECK-LE-P9-NEXT:    lfd f1, 0(r3)
; CHECK-LE-P9-NEXT:    xxmrghw vs0, vs1, vs0
; CHECK-LE-P9-NEXT:    stxv vs0, 0(r3)
; CHECK-LE-P9-NEXT:    blr
;
; CHECK-BE-P8-LABEL: test_v2i64_v2i64:
; CHECK-BE-P8:       # %bb.0: # %entry
; CHECK-BE-P8-NEXT:    lfdx f0, 0, r3
; CHECK-BE-P8-NEXT:    lfdx f1, 0, r3
; CHECK-BE-P8-NEXT:    xxmrghw vs0, vs0, vs1
; CHECK-BE-P8-NEXT:    stxvw4x vs0, 0, r3
; CHECK-BE-P8-NEXT:    blr
;
; CHECK-BE-P9-LABEL: test_v2i64_v2i64:
; CHECK-BE-P9:       # %bb.0: # %entry
; CHECK-BE-P9-NEXT:    lfd f0, 0(r3)
; CHECK-BE-P9-NEXT:    lfd f1, 0(r3)
; CHECK-BE-P9-NEXT:    xxmrghw vs0, vs0, vs1
; CHECK-BE-P9-NEXT:    stxv vs0, 0(r3)
; CHECK-BE-P9-NEXT:    blr
;
; CHECK-AIX-64-P8-LABEL: test_v2i64_v2i64:
; CHECK-AIX-64-P8:       # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT:    lfdx f0, 0, r3
; CHECK-AIX-64-P8-NEXT:    lfdx f1, 0, r3
; CHECK-AIX-64-P8-NEXT:    xxmrghw vs0, vs0, vs1
; CHECK-AIX-64-P8-NEXT:    stxvw4x vs0, 0, r3
; CHECK-AIX-64-P8-NEXT:    blr
;
; CHECK-AIX-64-P9-LABEL: test_v2i64_v2i64:
; CHECK-AIX-64-P9:       # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT:    lfd f0, 0(r3)
; CHECK-AIX-64-P9-NEXT:    lfd f1, 0(r3)
; CHECK-AIX-64-P9-NEXT:    xxmrghw vs0, vs0, vs1
; CHECK-AIX-64-P9-NEXT:    stxv vs0, 0(r3)
; CHECK-AIX-64-P9-NEXT:    blr
;
; CHECK-AIX-32-P8-LABEL: test_v2i64_v2i64:
; CHECK-AIX-32-P8:       # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT:    lwz r5, 4(r3)
; CHECK-AIX-32-P8-NEXT:    addi r4, r1, -16
; CHECK-AIX-32-P8-NEXT:    stw r5, -16(r1)
; CHECK-AIX-32-P8-NEXT:    lwz r3, 0(r3)
; CHECK-AIX-32-P8-NEXT:    stw r3, -32(r1)
; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -32
; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r4
; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r3
; CHECK-AIX-32-P8-NEXT:    lfiwzx f2, 0, r3
; CHECK-AIX-32-P8-NEXT:    xxmrghw vs0, vs1, vs0
; CHECK-AIX-32-P8-NEXT:    xxspltw vs1, vs2, 1
; CHECK-AIX-32-P8-NEXT:    xxmrghw vs0, vs1, vs0
; CHECK-AIX-32-P8-NEXT:    stxvw4x vs0, 0, r3
; CHECK-AIX-32-P8-NEXT:    blr
;
; CHECK-AIX-32-P9-LABEL: test_v2i64_v2i64:
; CHECK-AIX-32-P9:       # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT:    lwz r4, 4(r3)
; CHECK-AIX-32-P9-NEXT:    stw r4, -16(r1)
; CHECK-AIX-32-P9-NEXT:    lwz r3, 0(r3)
; CHECK-AIX-32-P9-NEXT:    lxv vs0, -16(r1)
; CHECK-AIX-32-P9-NEXT:    stw r3, -32(r1)
; CHECK-AIX-32-P9-NEXT:    lxv vs1, -32(r1)
; CHECK-AIX-32-P9-NEXT:    xxmrghw vs0, vs1, vs0
; CHECK-AIX-32-P9-NEXT:    lxvwsx vs1, 0, r3
; CHECK-AIX-32-P9-NEXT:    xxmrghw vs0, vs1, vs0
; CHECK-AIX-32-P9-NEXT:    stxv vs0, 0(r3)
; CHECK-AIX-32-P9-NEXT:    blr
entry:
  %0 = load <2 x i32>, ptr undef, align 4
  %1 = load <2 x i32>, ptr %a, align 4
  %2 = shufflevector <2 x i32> %0, <2 x i32> %1, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
  store <4 x i32> %2, ptr undef, align 4
  ret void
}

define void @test_v2i64_v4i32(ptr %a) {
; CHECK-LE-P8-LABEL: test_v2i64_v4i32:
; CHECK-LE-P8:       # %bb.0: # %entry
; CHECK-LE-P8-NEXT:    lfdx f0, 0, r3
; CHECK-LE-P8-NEXT:    lfiwzx f1, 0, r3
; CHECK-LE-P8-NEXT:    xxswapd vs0, f0
; CHECK-LE-P8-NEXT:    xxswapd vs1, f1
; CHECK-LE-P8-NEXT:    xxmrglw vs0, vs1, vs0
; CHECK-LE-P8-NEXT:    xxswapd vs0, vs0
; CHECK-LE-P8-NEXT:    stxvd2x vs0, 0, r3
; CHECK-LE-P8-NEXT:    blr
;
; CHECK-LE-P9-LABEL: test_v2i64_v4i32:
; CHECK-LE-P9:       # %bb.0: # %entry
; CHECK-LE-P9-NEXT:    lfd f0, 0(r3)
; CHECK-LE-P9-NEXT:    lfiwzx f1, 0, r3
; CHECK-LE-P9-NEXT:    xxswapd vs0, f0
; CHECK-LE-P9-NEXT:    xxswapd vs1, f1
; CHECK-LE-P9-NEXT:    xxmrglw vs0, vs1, vs0
; CHECK-LE-P9-NEXT:    stxv vs0, 0(r3)
; CHECK-LE-P9-NEXT:    blr
;
; CHECK-BE-P8-LABEL: test_v2i64_v4i32:
; CHECK-BE-P8:       # %bb.0: # %entry
; CHECK-BE-P8-NEXT:    lfiwzx f1, 0, r3
; CHECK-BE-P8-NEXT:    lfdx f0, 0, r3
; CHECK-BE-P8-NEXT:    xxsldwi vs1, f1, f1, 1
; CHECK-BE-P8-NEXT:    xxmrghw vs0, vs0, vs1
; CHECK-BE-P8-NEXT:    stxvw4x vs0, 0, r3
; CHECK-BE-P8-NEXT:    blr
;
; CHECK-BE-P9-LABEL: test_v2i64_v4i32:
; CHECK-BE-P9:       # %bb.0: # %entry
; CHECK-BE-P9-NEXT:    lfiwzx f1, 0, r3
; CHECK-BE-P9-NEXT:    lfd f0, 0(r3)
; CHECK-BE-P9-NEXT:    xxsldwi vs1, f1, f1, 1
; CHECK-BE-P9-NEXT:    xxmrghw vs0, vs0, vs1
; CHECK-BE-P9-NEXT:    stxv vs0, 0(r3)
; CHECK-BE-P9-NEXT:    blr
;
; CHECK-AIX-64-P8-LABEL: test_v2i64_v4i32:
; CHECK-AIX-64-P8:       # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT:    lfiwzx f1, 0, r3
; CHECK-AIX-64-P8-NEXT:    lfdx f0, 0, r3
; CHECK-AIX-64-P8-NEXT:    xxsldwi vs1, f1, f1, 1
; CHECK-AIX-64-P8-NEXT:    xxmrghw vs0, vs0, vs1
; CHECK-AIX-64-P8-NEXT:    stxvw4x vs0, 0, r3
; CHECK-AIX-64-P8-NEXT:    blr
;
; CHECK-AIX-64-P9-LABEL: test_v2i64_v4i32:
; CHECK-AIX-64-P9:       # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT:    lfiwzx f1, 0, r3
; CHECK-AIX-64-P9-NEXT:    lfd f0, 0(r3)
; CHECK-AIX-64-P9-NEXT:    xxsldwi vs1, f1, f1, 1
; CHECK-AIX-64-P9-NEXT:    xxmrghw vs0, vs0, vs1
; CHECK-AIX-64-P9-NEXT:    stxv vs0, 0(r3)
; CHECK-AIX-64-P9-NEXT:    blr
;
; CHECK-AIX-32-P8-LABEL: test_v2i64_v4i32:
; CHECK-AIX-32-P8:       # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT:    lwz r4, 0(r3)
; CHECK-AIX-32-P8-NEXT:    lwz r3, 0(r3)
; CHECK-AIX-32-P8-NEXT:    stw r3, -16(r1)
; CHECK-AIX-32-P8-NEXT:    stw r4, -32(r1)
; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
; CHECK-AIX-32-P8-NEXT:    addi r4, r1, -32
; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r3
; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r4
; CHECK-AIX-32-P8-NEXT:    xxmrghw vs0, vs1, vs0
; CHECK-AIX-32-P8-NEXT:    stxvw4x vs0, 0, r3
; CHECK-AIX-32-P8-NEXT:    blr
;
; CHECK-AIX-32-P9-LABEL: test_v2i64_v4i32:
; CHECK-AIX-32-P9:       # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT:    lwz r4, 0(r3)
; CHECK-AIX-32-P9-NEXT:    lwz r3, 0(r3)
; CHECK-AIX-32-P9-NEXT:    stw r3, -16(r1)
; CHECK-AIX-32-P9-NEXT:    stw r4, -32(r1)
; CHECK-AIX-32-P9-NEXT:    lxv vs0, -16(r1)
; CHECK-AIX-32-P9-NEXT:    lxv vs1, -32(r1)
; CHECK-AIX-32-P9-NEXT:    xxmrghw vs0, vs1, vs0
; CHECK-AIX-32-P9-NEXT:    stxv vs0, 0(r3)
; CHECK-AIX-32-P9-NEXT:    blr
entry:
  %0 = load <2 x i16>, ptr undef, align 8
  %tmp0_1 = bitcast <2 x i16> %0 to i32
  %tmp0_2 = insertelement <4 x i32> undef, i32 %tmp0_1, i32 0
  %1 = load <2 x i16>, ptr %a, align 4
  %tmp1_1 = bitcast <2 x i16> %1 to i32
  %tmp1_2 = insertelement <4 x i32> undef, i32 %tmp1_1, i32 0
  %2 = shufflevector <4 x i32> %tmp0_2, <4 x i32> %tmp1_2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
  store <4 x i32> %2, ptr undef, align 4
  ret void
}

define void @test_v2i64_v8i16(ptr %a) {
; CHECK-LE-P8-LABEL: test_v2i64_v8i16:
; CHECK-LE-P8:       # %bb.0: # %entry
; CHECK-LE-P8-NEXT:    lhz r4, 0(r3)
; CHECK-LE-P8-NEXT:    lfdx f0, 0, r3
; CHECK-LE-P8-NEXT:    mtfprd f1, r4
; CHECK-LE-P8-NEXT:    xxswapd vs0, f0
; CHECK-LE-P8-NEXT:    xxswapd vs1, vs1
; CHECK-LE-P8-NEXT:    xxmrglw vs0, vs1, vs0
; CHECK-LE-P8-NEXT:    xxswapd vs0, vs0
; CHECK-LE-P8-NEXT:    stxvd2x vs0, 0, r3
; CHECK-LE-P8-NEXT:    blr
;
; CHECK-LE-P9-LABEL: test_v2i64_v8i16:
; CHECK-LE-P9:       # %bb.0: # %entry
; CHECK-LE-P9-NEXT:    lxsihzx v2, 0, r3
; CHECK-LE-P9-NEXT:    lfd f0, 0(r3)
; CHECK-LE-P9-NEXT:    xxswapd vs0, f0
; CHECK-LE-P9-NEXT:    vsplth v2, v2, 3
; CHECK-LE-P9-NEXT:    xxmrglw vs0, v2, vs0
; CHECK-LE-P9-NEXT:    stxv vs0, 0(r3)
; CHECK-LE-P9-NEXT:    blr
;
; CHECK-BE-P8-LABEL: test_v2i64_v8i16:
; CHECK-BE-P8:       # %bb.0: # %entry
; CHECK-BE-P8-NEXT:    lhz r4, 0(r3)
; CHECK-BE-P8-NEXT:    lfdx f0, 0, r3
; CHECK-BE-P8-NEXT:    sldi r3, r4, 48
; CHECK-BE-P8-NEXT:    mtfprd f1, r3
; CHECK-BE-P8-NEXT:    xxmrghw vs0, vs0, vs1
; CHECK-BE-P8-NEXT:    stxvw4x vs0, 0, r3
; CHECK-BE-P8-NEXT:    blr
;
; CHECK-BE-P9-LABEL: test_v2i64_v8i16:
; CHECK-BE-P9:       # %bb.0: # %entry
; CHECK-BE-P9-NEXT:    lxsihzx v2, 0, r3
; CHECK-BE-P9-NEXT:    lfd f0, 0(r3)
; CHECK-BE-P9-NEXT:    vsplth v2, v2, 3
; CHECK-BE-P9-NEXT:    xxmrghw vs0, vs0, v2
; CHECK-BE-P9-NEXT:    stxv vs0, 0(r3)
; CHECK-BE-P9-NEXT:    blr
;
; CHECK-AIX-64-P8-LABEL: test_v2i64_v8i16:
; CHECK-AIX-64-P8:       # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT:    lhz r4, 0(r3)
; CHECK-AIX-64-P8-NEXT:    lfdx f0, 0, r3
; CHECK-AIX-64-P8-NEXT:    sldi r3, r4, 48
; CHECK-AIX-64-P8-NEXT:    mtfprd f1, r3
; CHECK-AIX-64-P8-NEXT:    xxmrghw vs0, vs0, vs1
; CHECK-AIX-64-P8-NEXT:    stxvw4x vs0, 0, r3
; CHECK-AIX-64-P8-NEXT:    blr
;
; CHECK-AIX-64-P9-LABEL: test_v2i64_v8i16:
; CHECK-AIX-64-P9:       # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT:    lxsihzx v2, 0, r3
; CHECK-AIX-64-P9-NEXT:    lfd f0, 0(r3)
; CHECK-AIX-64-P9-NEXT:    vsplth v2, v2, 3
; CHECK-AIX-64-P9-NEXT:    xxmrghw vs0, vs0, v2
; CHECK-AIX-64-P9-NEXT:    stxv vs0, 0(r3)
; CHECK-AIX-64-P9-NEXT:    blr
;
; CHECK-AIX-32-P8-LABEL: test_v2i64_v8i16:
; CHECK-AIX-32-P8:       # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT:    lhz r4, 0(r3)
; CHECK-AIX-32-P8-NEXT:    sth r4, -32(r1)
; CHECK-AIX-32-P8-NEXT:    addi r4, r1, -32
; CHECK-AIX-32-P8-NEXT:    lwz r3, 0(r3)
; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r4
; CHECK-AIX-32-P8-NEXT:    stw r3, -16(r1)
; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r3
; CHECK-AIX-32-P8-NEXT:    xxmrghw vs0, vs1, vs0
; CHECK-AIX-32-P8-NEXT:    stxvw4x vs0, 0, r3
; CHECK-AIX-32-P8-NEXT:    blr
;
; CHECK-AIX-32-P9-LABEL: test_v2i64_v8i16:
; CHECK-AIX-32-P9:       # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT:    lhz r4, 0(r3)
; CHECK-AIX-32-P9-NEXT:    sth r4, -32(r1)
; CHECK-AIX-32-P9-NEXT:    lwz r3, 0(r3)
; CHECK-AIX-32-P9-NEXT:    lxv vs0, -32(r1)
; CHECK-AIX-32-P9-NEXT:    stw r3, -16(r1)
; CHECK-AIX-32-P9-NEXT:    lxv vs1, -16(r1)
; CHECK-AIX-32-P9-NEXT:    xxmrghw vs0, vs1, vs0
; CHECK-AIX-32-P9-NEXT:    stxv vs0, 0(r3)
; CHECK-AIX-32-P9-NEXT:    blr
entry:
  %0 = load <2 x i8>, ptr undef, align 1
  %tmp0_1 = bitcast <2 x i8> %0 to i16
  %tmp0_2 = insertelement <8 x i16> undef, i16 %tmp0_1, i32 0
  %tmp0_3 = bitcast <8 x i16> %tmp0_2 to <4 x i32>
  %1 = load <2 x i16>, ptr %a, align 8
  %tmp1_1 = bitcast <2 x i16> %1 to i32
  %tmp1_2 = insertelement <4 x i32> undef, i32 %tmp1_1, i32 0
  %2 = shufflevector <4 x i32> %tmp1_2, <4 x i32> %tmp0_3, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
  store <4 x i32> %2, ptr undef, align 4
  ret void
}
